import pandas as pd
import numpy as np
import sys
from sklearn.preprocessing import LabelEncoder


df = pd.read_csv(sys.argv[1])

def label_a_colume(data_frame, colume_name: str):
    new_part =  pd.get_dummies(df[[colume_name]])
    data_frame = data_frame.join(new_part)
    data_frame = data_frame.drop(columns=[colume_name])
    return data_frame
    

df = label_a_colume(df, "workclass")
df = label_a_colume(df, "education")
df = label_a_colume(df, "marital-status")
df = label_a_colume(df, "occupation")
df = label_a_colume(df, "relationship")
df = label_a_colume(df, "race")
df = label_a_colume(df, "sex")
df = label_a_colume(df, "native-country")
df = label_a_colume(df, "salary")

print(df.columns)

df.to_csv("./prepared_adult.csv", index=None, header=False)








